rm(list = ls())

library(Lahman)
package 㤼㸱Lahman㤼㸲 was built under R version 4.0.5
library(mosaic)
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
Registered S3 method overwritten by 'mosaic':
  method                           from   
  fortify.SpatialPolygonsDataFrame ggplot2

The 'mosaic' package masks several functions from core packages in order to add 
additional features.  The original behavior of these functions should not be affected by this.

Attaching package: 㤼㸱mosaic㤼㸲

The following objects are masked from 㤼㸱package:dplyr㤼㸲:

    count, do, tally

The following object is masked from 㤼㸱package:Matrix㤼㸲:

    mean

The following object is masked from 㤼㸱package:ggplot2㤼㸲:

    stat

The following objects are masked from 㤼㸱package:stats㤼㸲:

    binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test, quantile, sd, t.test, var

The following objects are masked from 㤼㸱package:base㤼㸲:

    max, mean, min, prod, range, sample, sum
library(tidyr)
package 㤼㸱tidyr㤼㸲 was built under R version 4.0.5
Attaching package: 㤼㸱tidyr㤼㸲

The following objects are masked from 㤼㸱package:Matrix㤼㸲:

    expand, pack, unpack
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages ----------------------------------------------------------------------------------- tidyverse 1.3.0 --
v tibble  3.1.0     v stringr 1.4.0
v readr   1.4.0     v forcats 0.5.0
v purrr   0.3.4     
package 㤼㸱tibble㤼㸲 was built under R version 4.0.5-- Conflicts -------------------------------------------------------------------------------------- tidyverse_conflicts() --
x mosaic::count()            masks dplyr::count()
x purrr::cross()             masks mosaic::cross()
x mosaic::do()               masks dplyr::do()
x tidyr::expand()            masks Matrix::expand()
x dplyr::filter()            masks stats::filter()
x ggstance::geom_errorbarh() masks ggplot2::geom_errorbarh()
x dplyr::lag()               masks stats::lag()
x tidyr::pack()              masks Matrix::pack()
x mosaic::stat()             masks ggplot2::stat()
x mosaic::tally()            masks dplyr::tally()
x tidyr::unpack()            masks Matrix::unpack()
library(dplyr)
library(mplot)
package 㤼㸱mplot㤼㸲 was built under R version 4.0.5
Attaching package: 㤼㸱mplot㤼㸲

The following object is masked from 㤼㸱package:mosaic㤼㸲:

    mplot
library(ggplot2)
library(cluster)
library(factoextra)
package 㤼㸱factoextra㤼㸲 was built under R version 4.0.5Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(corrplot)
corrplot 0.92 loaded
library(data.table)
package 㤼㸱data.table㤼㸲 was built under R version 4.0.5Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
data.table 1.14.0 using 4 threads (see ?getDTthreads).  Latest news: r-datatable.com

Attaching package: 㤼㸱data.table㤼㸲

The following object is masked from 㤼㸱package:purrr㤼㸲:

    transpose

The following objects are masked from 㤼㸱package:dplyr㤼㸲:

    between, first, last
library(mod)
package 㤼㸱mod㤼㸲 was built under R version 4.0.5
Attaching package: 㤼㸱mod㤼㸲

The following object is masked from 㤼㸱package:Matrix㤼㸲:

    drop

The following object is masked from 㤼㸱package:base㤼㸲:

    drop
library(modelr)

Attaching package: 㤼㸱modelr㤼㸲

The following object is masked from 㤼㸱package:mosaic㤼㸲:

    resample

The following object is masked from 㤼㸱package:ggformula㤼㸲:

    na.warn
#Load in People, Batting, and Pitching Dataframes
data("People") 
data("Batting")
data("Pitching")
#Merges player name to Batting data. 
bstats <- battingStats()
    str(bstats)
'data.frame':   108789 obs. of  29 variables:
 $ playerID: chr  "abercda01" "addybo01" "allisar01" "allisdo01" ...
 $ yearID  : int  1871 1871 1871 1871 1871 1871 1871 1871 1871 1871 ...
 $ stint   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ teamID  : Factor w/ 149 levels "ALT","ANA","ARI",..: 136 111 39 142 111 56 111 24 56 24 ...
 $ lgID    : Factor w/ 7 levels "AA","AL","FL",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ G       : int  1 25 29 27 25 12 1 31 1 18 ...
 $ AB      : int  4 118 137 133 120 49 4 157 5 86 ...
 $ R       : int  0 30 28 28 29 9 0 66 1 13 ...
 $ H       : int  0 32 40 44 39 11 1 63 1 13 ...
 $ X2B     : int  0 6 4 10 11 2 0 10 1 2 ...
 $ X3B     : int  0 0 5 2 3 1 0 9 0 1 ...
 $ HR      : int  0 0 0 2 0 0 0 0 0 0 ...
 $ RBI     : int  0 13 19 27 16 5 2 34 1 11 ...
 $ SB      : int  0 8 3 1 6 0 0 11 0 1 ...
 $ CS      : int  0 1 1 1 2 1 0 6 0 0 ...
 $ BB      : int  0 4 2 0 2 0 1 13 0 0 ...
 $ SO      : int  0 0 5 2 1 1 0 1 0 0 ...
 $ IBB     : int  NA NA NA NA NA NA NA NA NA NA ...
 $ HBP     : int  NA NA NA NA NA NA NA NA NA NA ...
 $ SH      : int  NA NA NA NA NA NA NA NA NA NA ...
 $ SF      : int  NA NA NA NA NA NA NA NA NA NA ...
 $ GIDP    : int  0 0 1 0 0 0 0 1 0 0 ...
 $ BA      : num  0 0.271 0.292 0.331 0.325 0.224 0.25 0.401 0.2 0.151 ...
 $ PA      : num  4 122 139 133 122 49 5 170 5 86 ...
 $ TB      : num  0 38 54 64 56 15 1 91 2 17 ...
 $ SlugPct : num  0 0.322 0.394 0.481 0.467 0.306 0.25 0.58 0.4 0.198 ...
 $ OBP     : num  0 0.295 0.302 0.331 0.336 0.224 0.4 0.447 0.2 0.151 ...
 $ OPS     : num  0 0.617 0.696 0.812 0.803 ...
 $ BABIP   : num  0 0.271 0.303 0.326 0.328 0.229 0.25 0.404 0.2 0.151 ...
    

People$name <- paste(People$nameFirst, People$nameLast, sep = " ")

batting_name <- merge(Batting,
                 People[,c("playerID", "name")],
                 by = "playerID", all.x = TRUE)

#Merges player name to Pitching data.

People$name <- paste(People$nameFirst, People$nameLast, sep = " ")

pitching_name <- merge(Pitching,
                 People[,c("playerID", "name")],
                 by = "playerID", all.x = TRUE)
#Creating additional stats for bstats
bstats[is.na(bstats)] = 0
#is.nan(bstats)

bstats <- bstats %>%
  mutate(K_Percent = SO / PA) %>%
  mutate(BB_Percent = (BB + IBB) / PA) %>%
  mutate_all(~replace(., is.nan(.), 0))
invalid factor level, NA generatedinvalid factor level, NA generated
bstats <- bstats %>%
  mutate_at(vars(K_Percent, BB_Percent), funs(round(., 3)))
`funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas: 

  # Simple named list: 
  list(mean = mean, median = median)

  # Auto named with `tibble::lst()`: 
  tibble::lst(mean, median)

  # Using lambdas
  list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
bstats_salary <- bstats %>%
              filter(yearID >= 1985) %>%
              left_join(select(Salaries, playerID, yearID, teamID, salary), 
                         by=c("playerID", "yearID", "teamID"))

bstats_salary[is.na(bstats_salary)] = 0
str(bstats_salary)
'data.frame':   46535 obs. of  32 variables:
 $ playerID  : chr  "aasedo01" "abregjo01" "ackerji01" "adamsri02" ...
 $ yearID    : num  1985 1985 1985 1985 1985 ...
 $ stint     : num  1 1 1 1 1 1 1 1 1 1 ...
 $ teamID    : Factor w/ 149 levels "ALT","ANA","ARI",..: 5 35 134 117 33 102 94 134 134 134 ...
 $ lgID      : Factor w/ 7 levels "AA","AL","FL",..: 2 5 2 5 2 5 5 2 2 2 ...
 $ G         : num  54 6 61 54 54 91 22 12 36 14 ...
 $ AB        : num  0 9 0 121 0 165 36 20 0 34 ...
 $ R         : num  0 0 0 12 0 27 1 2 0 2 ...
 $ H         : num  0 0 0 23 0 46 10 4 0 4 ...
 $ X2B       : num  0 0 0 3 0 7 2 1 0 1 ...
 $ X3B       : num  0 0 0 1 0 3 0 0 0 0 ...
 $ HR        : num  0 0 0 2 0 6 0 1 0 0 ...
 $ RBI       : num  0 1 0 10 0 21 2 5 0 3 ...
 $ SB        : num  0 0 0 1 0 1 0 0 0 0 ...
 $ CS        : num  0 0 0 1 0 0 0 0 0 0 ...
 $ BB        : num  0 0 0 5 0 22 1 3 0 0 ...
 $ SO        : num  0 2 0 23 0 26 5 6 0 10 ...
 $ IBB       : num  0 0 0 3 0 5 0 0 0 0 ...
 $ HBP       : num  0 0 0 1 0 6 0 0 0 0 ...
 $ SH        : num  0 0 0 3 0 4 7 0 0 0 ...
 $ SF        : num  0 0 0 0 0 3 0 1 0 0 ...
 $ GIDP      : num  0 0 0 2 0 7 1 1 0 1 ...
 $ BA        : num  0 0 0 0.19 0 0.279 0.278 0.2 0 0.118 ...
 $ PA        : num  0 9 0 130 0 200 44 24 0 34 ...
 $ TB        : num  0 0 0 34 0 77 12 8 0 5 ...
 $ SlugPct   : num  0 0 0 0.281 0 0.467 0.333 0.4 0 0.147 ...
 $ OBP       : num  0 0 0 0.228 0 0.378 0.297 0.292 0 0.118 ...
 $ OPS       : num  0 0 0 0.509 0 0.845 0.63 0.692 0 0.265 ...
 $ BABIP     : num  0 0 0 0.219 0 0.294 0.323 0.214 0 0.167 ...
 $ K_Percent : num  0 0.222 0 0.177 0 0.13 0.114 0.25 0 0.294 ...
 $ BB_Percent: num  0 0 0 0.062 0 0.135 0.023 0.125 0 0 ...
 $ salary    : num  0 0 170000 0 147500 ...
bstats_sure <- bstats_salary %>%
  filter(PA > 150) %>%
  select(OPS, BABIP, K_Percent, BB_Percent, salary)

Data Preparation (Lesson 1 & 2)

#Keep players with over 150 at bats. (We can change this value if necessary).
#Creating batting average variable.

batting1 <- bstats %>%
  filter(AB >= 150)
  
bstats %>%
  filter(playerID == "bogaexa01")
<<<<<<< HEAD ======= >>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366

Exploratory Analysis (Lesson 1 & 2)

Lessons 1 and 2 will just be parts of the overall project. Simple things like data manipulation, apply functions, boxplots, etc. This will be data preparation items and exploratory analysis.

b <- ggplot(batting1, aes(x = teamID, y = HR)) +
  geom_boxplot(col = "black", aes(fill = teamID))
b
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
hitters1 <- batting1 %>%
  filter(yearID < 1895) %>%
  select(SlugPct)

hitters2 <- batting1 %>%
  filter(yearID > 1894, yearID < 1921) %>%
  select(SlugPct)

hitters3 <- batting1 %>%
  filter(yearID > 1920, yearID < 1969) %>%
  select(SlugPct)

hitters4 <- batting1 %>%
  filter(yearID > 1969) %>%
  select(SlugPct)
#Organizing 4 different datasets looking at slugging percentage for the following boxplots. All of these are somewhat different eras, with the most dramatic split being from before 1920 (pre-Babe Ruth) and after 1920 (during and post-Babe Ruth)
boxplot(hitters1,
        main = "Slugging percentage from late 1871 - 1894",
        ylab = "Slugging percentage",
        col = "blue",
        horizontal = TRUE)
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
boxplot(hitters2, 
        main = "Slugging percentage from 1895-1920",
        ylab = "Slugging percentage",
        col = "yellow",
        horizontal = TRUE)
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
boxplot(hitters3, 
        main = "Slugging percentage from 1921-1968",
        ylab = "Slugging percentage",
        col = "red",
        horizontal = TRUE)
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
boxplot(hitters4, 
        main = "Slugging percentage from 1969 - present",
        ylab = "Slugging percentage",
        col = "red",
        horizontal = TRUE)
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
sapply(hitters1, mean, na.rm = T)
  SlugPct 
0.3456088 
sapply(hitters2, mean, na.rm = T)
 SlugPct 
0.348923 
sapply(hitters3, mean, na.rm = T)
  SlugPct 
0.3972127 
sapply(hitters4, mean, na.rm = T)
  SlugPct 
0.4088045 
#Notice that gigantic increase between hitters2 and hitters3
summary(hitters1)
    SlugPct      
 Min.   :0.1220  
 1st Qu.:0.2900  
 Median :0.3380  
 Mean   :0.3456  
 3rd Qu.:0.3970  
 Max.   :0.6960  
summary(hitters2)
    SlugPct      
 Min.   :0.1480  
 1st Qu.:0.3003  
 Median :0.3430  
 Mean   :0.3489  
 3rd Qu.:0.3910  
 Max.   :0.8490  
summary(hitters3)
    SlugPct      
 Min.   :0.1760  
 1st Qu.:0.3420  
 Median :0.3900  
 Mean   :0.3972  
 3rd Qu.:0.4440  
 Max.   :0.8460  
summary(hitters4)
    SlugPct      
 Min.   :0.1730  
 1st Qu.:0.3540  
 Median :0.4040  
 Mean   :0.4088  
 3rd Qu.:0.4580  
 Max.   :0.8630  
#Keep batting stats that we want for pairs.
batting_num <- bstats %>%
  filter(PA >= 150) %>%
  select("BA", 'OBP', 'SlugPct', "SO", "BB", "HR")
  
pairs(batting_num)
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366

Career Batting Stats

careerBatting <- na.omit(bstats)
careerBatting <- careerBatting %>%
  select(playerID, BA, PA, SlugPct, OBP, SO, HR) %>%
  group_by(playerID) %>%
  summarise_all('mean')
careerBatting_num <- careerBatting %>%
  filter(PA >= 150) %>%
  select(BA, PA, SlugPct, OBP, SO, HR)

pairs(careerBatting_num)
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
corrmatrix <- cor(batting_num)
corrplot(corrmatrix, method = 'number') #Gives us correlation from pairs graph.
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
careerBatting_num1 <- careerBatting_num %>%
  filter(PA > 500)

0-dimensional Reduction (Lesson 4)

Bootstrapping

PCA (Lesson 4)

res <- batting_num %>% prcomp(scale = TRUE)
res
Standard deviations (1, .., p=6):
[1] 1.8624983 1.1955799 0.8163046 0.5272521 0.3234188 0.2296540

Rotation (n x k) = (6 x 6):
               PC1         PC2         PC3         PC4        PC5          PC6
BA      -0.3736490  0.53149382  0.20948811 -0.39409469  0.6134310  0.049063667
OBP     -0.4412694  0.38795844 -0.30295510 -0.06651166 -0.5817204  0.469217735
SlugPct -0.4816546  0.08527252  0.45916589  0.20230952 -0.3441137 -0.624948649
SO      -0.2974863 -0.61917967  0.04176753 -0.71554909 -0.1194610  0.009617743
BB      -0.4043725 -0.14520286 -0.75150469  0.19652707  0.2909420 -0.356888661
HR      -0.4262175 -0.39403532  0.29495049  0.49870136  0.2607132  0.509317820
loadings <- res$rotation
loadings
               PC1         PC2         PC3         PC4        PC5          PC6
BA      -0.3736490  0.53149382  0.20948811 -0.39409469  0.6134310  0.049063667
OBP     -0.4412694  0.38795844 -0.30295510 -0.06651166 -0.5817204  0.469217735
SlugPct -0.4816546  0.08527252  0.45916589  0.20230952 -0.3441137 -0.624948649
SO      -0.2974863 -0.61917967  0.04176753 -0.71554909 -0.1194610  0.009617743
BB      -0.4043725 -0.14520286 -0.75150469  0.19652707  0.2909420 -0.356888661
HR      -0.4262175 -0.39403532  0.29495049  0.49870136  0.2607132  0.509317820
score_mat <- res$x
score_mat
                   PC1           PC2           PC3           PC4           PC5           PC6
    [1,] -2.416723e+00  4.560698e+00  1.443027e+00 -7.333664e-01 -4.234072e-01 -1.762980e-01
    [2,]  1.219725e+00  1.849015e+00  8.201252e-01 -1.952915e-01  3.981469e-01  1.822513e-01
    [3,]  1.474218e+00  7.482123e-01  9.141729e-01  8.396478e-01 -2.439173e-01 -5.416897e-01
    [4,]  6.203888e-01  2.304795e+00  1.057469e+00 -2.640564e-01  2.662469e-01  3.072002e-02
    [5,]  2.943669e+00  3.660013e-01  5.506029e-01  4.155864e-01  5.317398e-01 -3.286813e-02
    [6,]  1.777087e+00  1.308355e+00  8.996221e-01  1.318660e-01  3.669270e-01 -1.200040e-01
    [7,]  1.638294e+00  1.365860e+00  9.687942e-01  2.460817e-01  2.518894e-01 -2.846689e-01
    [8,]  1.277859e+00  1.429171e+00  1.237701e+00  3.160011e-01  2.384888e-01 -3.187137e-01
    [9,]  2.575716e+00  7.738965e-01  5.169331e-01  3.002796e-01  5.345135e-01  1.057655e-01
   [10,]  1.406601e+00  1.626801e+00  1.019207e+00  6.878197e-02  3.105486e-01 -1.622986e-01
   [11,]  2.314598e+00  1.032260e+00  3.284764e-01  1.875309e-01  4.737664e-01  3.377658e-01
   [12,] -2.254698e+00  4.915125e+00  1.951355e+00 -1.215630e+00  1.700315e-01  1.075539e-01
   [13,]  1.714346e+00  1.191014e+00  1.054853e+00  3.211466e-01  2.940316e-01 -2.832409e-01
   [14,]  4.214521e+00 -5.515752e-01  3.050290e-01  7.087223e-01  7.976724e-01  3.980669e-02
   [15,]  2.271122e+00  1.078424e+00  5.187903e-01  1.175299e-01  5.660790e-01  2.481744e-01
   [16,]  7.655863e-01  2.041193e+00  1.014612e+00  1.844765e-02  5.616512e-02 -1.959685e-01
   [17,]  1.520407e+00  1.432379e+00  1.091449e+00  2.042299e-01  2.334932e-01 -3.524935e-01
   [18,]  1.490633e+00  1.291377e+00  6.930873e-01  3.470345e-01  6.483829e-02 -9.133176e-02
   [19,] -1.993183e-01  3.201744e+00  1.191853e+00 -5.892269e-01  4.491340e-01  4.036008e-01
   [20,]  3.770710e-01  2.434379e+00  7.232593e-01 -1.258865e-01  2.812814e-02  1.074989e-01
   [21,]  2.922852e-01  2.390471e+00  1.332478e+00 -8.020927e-02  1.240978e-01 -2.261130e-01
   [22,]  7.804409e-01  2.075181e+00  2.583384e-01 -8.693810e-02  3.394759e-03  4.109240e-01
   [23,]  1.371866e+00  9.825359e-01  9.688047e-01  7.256051e-01 -2.492550e-01 -5.812617e-01
   [24,] -2.382359e+00  4.714902e+00  1.062824e+00 -1.046896e+00 -3.540856e-02  3.003221e-01
   [25,]  1.292267e+00  2.096809e+00  6.511071e-01 -4.066068e-01  6.834969e-01  5.933871e-01
   [26,] -2.777582e+00  4.944257e+00  1.755964e+00 -1.083355e+00 -5.785753e-02  1.077638e-02
   [27,]  1.190319e+00  1.795224e+00  7.511231e-01 -9.836268e-02  3.912924e-01  6.458790e-02
   [28,]  2.138132e+00  7.503777e-01  5.815927e-01  2.363811e-01  3.601793e-01  5.370821e-02
   [29,]  2.204313e+00  9.435450e-01  5.069415e-01  1.122817e-01  4.312682e-01  1.689245e-01
   [30,]  2.534510e+00  9.701706e-01  6.262459e-01  7.652271e-02  7.558703e-01  2.656094e-01
   [31,]  1.554444e+00  1.436907e+00  1.008768e+00  9.023136e-02  5.609106e-01  1.073257e-01
   [32,]  1.819823e+00  1.362150e+00  6.227748e-01  4.177417e-02  4.474395e-01  1.647032e-01
   [33,]  3.601595e+00 -6.392722e-01  4.603145e-01  4.955963e-01  5.099287e-01 -2.749128e-01
   [34,]  2.645202e-01  2.481165e+00  9.207404e-01 -6.058332e-01  4.796779e-01  3.919094e-01
   [35,]  3.421297e+00  1.147581e-02  4.038722e-01  5.460176e-01  6.062009e-01  1.169545e-02
   [36,]  2.471482e-01  2.481274e+00  8.898948e-01 -5.414952e-01  3.639352e-01  1.736046e-01
   [37,]  2.150533e+00  1.295666e+00  5.544389e-01  1.153722e-02  6.516006e-01  3.546918e-01
   [38,]  3.006876e+00  8.594070e-03  8.192747e-01  5.894401e-01  4.886067e-01 -3.422591e-01
   [39,]  1.854183e-02  2.884613e+00  1.174663e+00 -5.676400e-01  3.734326e-01  1.501689e-01
   [40,]  1.393717e+00  1.754510e+00  7.957141e-01 -1.746703e-01  5.928169e-01  3.670016e-01
   [41,]  2.260195e+00  6.986164e-01  1.076252e+00  4.301822e-01  3.090228e-01 -5.521915e-01
   [42,]  5.987150e-02  2.642965e+00  1.483975e+00 -2.125679e-01  2.386474e-01 -2.420331e-01
   [43,]  4.316153e-01  2.249363e+00  8.628850e-01 -2.927017e-01  3.697471e-01  1.355797e-01
   [44,]  1.114305e+00  1.925993e+00  7.971480e-01 -1.876282e-01  4.587106e-01  1.283862e-01
   [45,]  1.854586e-01  2.651860e+00  1.294539e+00 -4.079149e-01  5.125728e-01  2.486702e-01
   [46,]  1.893873e-01  2.772122e+00  1.357466e+00 -4.803023e-01  5.973779e-01  3.360247e-01
   [47,]  7.292379e-01  1.737208e+00 -2.609344e-01 -9.363625e-02 -4.368283e-02  4.379741e-01
   [48,]  1.847550e+00  1.276145e+00  7.167369e-01 -2.892983e-02  5.097654e-01  1.042807e-01
   [49,]  3.626601e+00 -8.547987e-02  2.693330e-01  4.713822e-01  7.559704e-01  1.828194e-01
   [50,]  1.930785e+00  1.394545e+00  6.893944e-01 -6.183811e-02  6.532000e-01  2.401690e-01
   [51,] -6.078538e-02  3.176777e+00  1.211221e+00 -6.759564e-01  4.986540e-01  3.306064e-01
   [52,]  2.339012e+00  6.987610e-01  1.334819e-01  2.047737e-01  4.287504e-01  2.192671e-01
   [53,]  9.842678e-01  2.225258e+00  1.096110e+00 -3.051193e-01  5.516512e-01  1.454291e-01
   [54,]  1.455407e+00  1.672343e+00  9.417622e-01 -6.577560e-02  4.895289e-01 -1.238487e-03
   [55,]  4.300761e+00 -9.084043e-01  2.143078e-01  9.553783e-01  6.595707e-01 -6.440636e-02
   [56,]  4.710233e-01  1.423581e+00  1.662245e+00  4.631328e-01  2.677690e-01 -3.133157e-01
   [57,]  1.649192e+00  1.423337e+00  1.162990e+00  7.234994e-02  5.394208e-01 -1.026508e-01
   [58,]  2.768852e+00  1.478174e-01  8.899297e-01  5.451073e-01  3.915213e-01 -3.815478e-01
   [59,]  2.099573e+00  1.023482e+00  9.221362e-01  1.216150e-01  6.103257e-01  1.984430e-02
   [60,]  1.569637e+00  1.275754e+00  1.234741e+00 -7.314692e-03  4.644239e-01 -2.085609e-01
   [61,] -1.498821e-01  3.080767e+00  1.334832e+00 -5.264526e-01  3.427686e-01  4.359203e-02
   [62,]  2.568969e+00  9.856887e-01  6.948187e-01  8.865470e-02  7.938021e-01  2.354448e-01
   [63,]  1.673655e+00  9.690939e-01  8.796657e-01  1.193615e-01  4.153881e-01 -3.410592e-02
   [64,] -4.853341e-02  2.629401e+00  1.554874e+00 -1.519071e-01  2.415414e-01 -2.272155e-01
   [65,]  2.410371e+00  7.924908e-01  2.061873e-01  2.658986e-01  4.473866e-01  2.400031e-01
   [66,]  2.280413e+00  9.146890e-01  8.091815e-01  2.796219e-01  4.894963e-01 -1.584045e-01
   [67,] -2.505357e-01  3.082187e+00  1.431683e+00 -4.758327e-01  3.200074e-01  8.442382e-02
   [68,] -1.108737e+00  4.083915e+00  1.210488e+00 -1.059773e+00  4.674161e-01  5.476804e-01
   [69,]  2.146383e+00  1.108095e+00  5.400660e-01  1.272634e-01  6.767289e-01  3.062165e-01
   [70,] -3.332913e+00  5.029997e+00  1.529292e+00 -8.384134e-01 -1.736625e-01 -2.189377e-01
   [71,]  1.917417e+00  1.118149e+00  1.174363e+00  2.949981e-01  3.501859e-01 -4.599910e-01
   [72,]  2.510081e+00  4.956159e-01  4.078296e-01  5.372300e-01  4.147258e-01 -5.399929e-02
   [73,]  1.842099e+00  1.296739e+00  6.460042e-01 -8.950588e-02  5.084560e-01  2.165966e-01
   [74,]  1.946117e+00  1.088249e+00  9.342168e-01  1.443770e-01  5.197305e-01 -4.137615e-02
   [75,]  4.010638e+00 -9.043144e-01 -9.277337e-02  5.143939e-01  5.910971e-01  1.226830e-01
   [76,]  2.317837e+00  5.746190e-01  5.556606e-01  4.191115e-01  4.862667e-01  5.763530e-02
   [77,]  3.074563e-01  2.534392e+00  1.401686e+00 -3.824038e-01  4.268553e-01 -1.232876e-03
   [78,]  1.167005e+00  1.809348e+00  1.129422e+00 -4.721470e-02  4.501069e-01 -2.529551e-02
   [79,]  1.632560e+00  1.468100e+00  9.918127e-01  1.531525e-03  4.588732e-01 -9.019922e-02
   [80,]  2.741739e+00  5.302697e-01  3.604595e-01  2.144569e-01  5.750893e-01  2.100966e-01
   [81,]  1.971533e+00  1.017793e+00  9.299388e-01  1.804371e-01  6.125357e-01  6.301301e-02
   [82,]  2.721711e+00  2.680274e-01  2.327942e-02  2.907707e-01  3.877542e-01  2.068869e-01
   [83,]  2.135547e-01  2.706228e+00  1.040692e+00 -4.148453e-01  3.578101e-01  1.113258e-01
   [84,]  2.376955e+00  6.755834e-01  6.517979e-01  1.401077e-01  4.918054e-01 -3.603275e-02
   [85,]  2.360438e+00  7.127484e-01  6.694583e-01  2.299599e-01  5.813277e-01  5.283155e-02
   [86,]  4.128479e-02  2.776277e+00  1.415481e+00 -3.831971e-01  3.556296e-01 -7.593776e-03
   [87,] -4.389638e-01  3.392092e+00  8.501625e-01 -7.063879e-01  3.920052e-01  4.741925e-01
   [88,]  1.770532e+00  1.189871e+00  1.037435e+00  1.423073e-01  4.706187e-01 -1.423005e-01
   [89,]  1.930187e+00  9.620627e-01  7.382488e-01  2.714791e-01  3.795639e-01 -9.154777e-02
   [90,]  2.155863e+00  9.065004e-01  8.945369e-01  3.965488e-01  4.249762e-01 -1.818956e-01
   [91,]  1.523907e-01  2.862417e+00  1.276905e+00 -4.713282e-01  3.751188e-01  9.063007e-02
   [92,]  9.995124e-01  1.823088e+00  1.170911e+00  1.344113e-02  4.476761e-01 -1.903802e-02
   [93,]  1.110998e+00  2.110401e+00  9.938894e-01 -2.666306e-01  5.675575e-01  1.759913e-01
   [94,]  5.192421e-01  2.435532e+00  1.302713e+00 -3.091739e-01  4.550121e-01  7.071398e-02
   [95,]  7.936573e-01  2.224945e+00  1.313504e+00 -3.269706e-01  4.517846e-01 -6.770236e-02
   [96,]  8.403641e-01  2.256260e+00  1.178937e+00 -2.087666e-01  4.417500e-01 -3.706420e-02
   [97,]  1.148718e+00  1.669228e+00  2.253819e-02 -2.459589e-01  2.848365e-01  5.532400e-01
   [98,]  1.185487e+00  1.572026e+00  3.058708e-01 -8.272972e-02  2.987945e-01  1.777574e-01
   [99,]  2.380042e+00  6.606109e-01  7.417901e-01 -1.160135e-01  6.060816e-01  9.513828e-02
  [100,]  3.908648e+00 -6.824891e-01 -8.243134e-02  8.576197e-01  4.371080e-01 -5.428605e-02
  [101,]  2.174089e+00  1.264951e+00  6.906613e-01  2.511012e-02  7.083800e-01  2.293995e-01
  [102,]  1.618655e+00  1.738117e+00  8.890477e-01 -1.790972e-01  6.765123e-01  2.320327e-01
  [103,]  1.964422e+00  1.146938e+00  4.015434e-01  6.802530e-02  4.616389e-01  2.143541e-01
  [104,] -1.042642e+00  3.631385e+00  1.599374e+00 -6.467583e-01  2.788696e-01  1.557212e-01
  [105,] -3.249017e-01  2.867747e+00  1.670345e+00 -2.033250e-01  3.077568e-01 -1.065481e-01
  [106,]  1.571414e-01  2.589093e+00  1.581907e+00 -1.917588e-01  1.676164e-01 -4.610063e-01
  [107,]  3.075156e+00 -2.184416e-01  1.588215e-01  3.996932e-01  3.985028e-01  1.536229e-01
  [108,]  9.464263e-01  2.334568e+00  9.881629e-01 -4.426485e-01  6.270796e-01  3.568946e-01
  [109,] -6.112114e-01  3.051586e+00  8.299057e-01 -3.936699e-01  1.653944e-01  1.061102e-01
  [110,] -2.891211e-01  3.207652e+00  1.054573e+00 -6.829069e-01  3.826751e-01  3.203025e-01
  [111,]  1.958538e+00  1.147893e+00  4.340068e-01  1.390249e-01  5.734960e-01  2.825576e-01
  [112,]  8.340147e-02  2.055687e+00  1.394886e+00  1.639048e-01  1.670183e-01 -3.031638e-01
  [113,]  1.948431e+00  1.402737e+00  5.745996e-01 -8.495996e-02  6.732018e-01  3.401546e-01
  [114,]  1.331906e+00  1.598962e+00  1.169111e+00  7.239512e-02  4.155062e-01 -1.724547e-01
  [115,]  2.089271e+00  8.270733e-01  9.166172e-01  3.041473e-01  5.516338e-01 -6.825203e-02
  [116,]  6.652700e-01  2.346548e+00  1.092434e+00 -3.183514e-01  5.451402e-01  2.086985e-01
  [117,]  2.182842e+00  9.434587e-01  7.752312e-01  3.309184e-01  5.282321e-01 -5.022710e-02
  [118,]  5.013390e-01  2.533947e+00  1.210166e+00 -3.879506e-01  4.280099e-01  5.845775e-02
  [119,]  2.398461e+00  5.670894e-01  6.248191e-01  2.635547e-01  5.100397e-01  1.409121e-03
  [120,] -1.188190e+00  3.900769e+00  1.819585e+00 -8.126923e-01  3.070541e-01  4.488749e-02
  [121,]  2.227782e+00  1.044966e+00  9.926182e-01  1.859686e-01  5.704792e-01 -1.576408e-01
  [122,]  6.265958e-01  2.374716e+00  7.351219e-01 -2.704773e-01  3.419254e-01  2.442308e-01
  [123,] -1.527206e+00  3.792398e+00  1.498283e+00 -5.826627e-01  2.284069e-01  1.143657e-01
  [124,]  1.949358e+00  7.736311e-01  4.591902e-01  4.113739e-01  3.819606e-01  2.007939e-02
  [125,]  9.546985e-01  1.712587e+00  1.279208e+00  6.244127e-02  3.509531e-01 -2.032127e-01
  [126,]  4.175919e+00 -6.018102e-01  3.044794e-01  5.929268e-01  8.285393e-01  6.653842e-02
  [127,]  3.073146e+00  2.745347e-01  7.352831e-01  4.814719e-01  5.952238e-01 -1.992386e-01
  [128,]  1.774820e+00  1.289509e+00  6.669919e-01 -1.641910e-02  5.258865e-01  6.206168e-02
  [129,]  5.375029e-01  2.595559e+00  9.792746e-01 -4.384009e-01  5.128877e-01  2.760494e-01
  [130,]  1.598670e+00  1.663336e+00  8.385291e-01 -1.327358e-01  5.606892e-01  2.006034e-01
  [131,]  8.938875e-02  2.782653e+00  9.391116e-01 -4.292095e-01  3.250307e-01  1.562155e-01
  [132,]  4.491808e+00 -1.287511e+00  3.988355e-01  6.978455e-01  6.750664e-01 -2.784215e-01
  [133,]  3.277880e+00 -6.584339e-02  6.927470e-01  4.305486e-01  6.061131e-01 -2.223229e-01
  [134,]  1.638951e+00  1.263582e+00  1.160531e+00  2.927038e-01  3.091054e-01 -3.312571e-01
  [135,]  3.296838e+00  2.304245e-01  3.456176e-01  2.485582e-01  8.001902e-01  3.026274e-01
  [136,]  3.596579e+00 -2.680050e-01  5.883358e-01  5.459397e-01  6.462194e-01 -1.850761e-01
  [137,]  2.454556e+00  6.178760e-01  7.683971e-01  3.774372e-01  4.885282e-01 -9.600194e-02
  [138,]  3.253124e+00  2.323717e-01  4.309048e-01  3.185377e-01  7.544745e-01  1.730422e-01
  [139,]  1.970733e+00  1.018884e+00  1.053743e+00  1.557218e-01  5.270654e-01 -1.585225e-01
  [140,]  1.732010e+00  1.305955e+00  9.698273e-01  4.194498e-02  5.546218e-01  9.694220e-03
  [141,]  2.225008e+00  7.309146e-01  6.902748e-01  3.084738e-02  4.856930e-01 -3.255164e-02
  [142,]  1.097909e+00  1.474785e+00  1.536971e+00  3.588217e-01  2.872577e-01 -4.562505e-01
  [143,] -2.784413e-01  2.874435e+00  1.589095e+00 -2.842018e-01  7.354918e-02 -4.702583e-01
  [144,]  3.543882e+00 -3.235219e-01  6.006674e-01  3.081664e-01  7.149754e-01 -8.164553e-02
  [145,]  2.129066e+00  8.346168e-01  6.714373e-01  2.156418e-01  6.212377e-01  1.639034e-01
  [146,]  2.778449e+00  4.690608e-01  5.346695e-01  3.530826e-01  5.238112e-01 -1.691195e-02
  [147,]  3.318391e+00 -4.019898e-02  1.329822e-01  4.554767e-01  5.679152e-01  1.507273e-01
  [148,]  1.961713e+00  1.228795e+00  6.940204e-01 -2.294416e-01  6.582778e-01  2.966505e-01
  [149,]  6.883506e-01  2.237150e+00  1.111198e+00 -1.600857e-01  3.408241e-01 -1.358345e-01
  [150,]  2.674405e+00  6.525884e-01  5.475495e-01  3.712414e-02  7.278302e-01  2.354851e-01
  [151,]  3.556008e+00 -2.199490e-01  4.501220e-01  4.201832e-01  7.066383e-01 -9.600422e-03
  [152,]  7.393203e-01  2.392853e+00  1.287022e+00 -3.290517e-01  4.924273e-01  1.329802e-02
  [153,]  1.280523e+00  2.026696e+00  8.107106e-01 -2.868386e-01  6.492289e-01  3.371576e-01
  [154,]  2.170348e+00  1.131354e+00  6.813562e-01 -6.638855e-02  6.787592e-01  2.330541e-01
  [155,]  1.849284e+00  9.991822e-01  5.051238e-01 -5.257306e-02  4.897946e-01  2.653435e-01
  [156,]  1.870168e+00  1.316393e+00  6.006543e-01 -4.324355e-02  5.540260e-01  2.129298e-01
  [157,]  1.371166e+00  1.486129e+00  1.384976e+00  2.654090e-01  3.073133e-01 -4.855113e-01
  [158,]  7.454876e-01  2.311675e+00  9.709886e-01 -3.586801e-01  4.676539e-01  1.803653e-01
  [159,]  3.140966e+00 -1.743634e-01  3.273602e-01  3.639460e-01  5.143199e-01 -9.705433e-02
  [160,]  2.111663e+00  1.092447e+00  5.673255e-01  4.419451e-02  5.741471e-01  1.651516e-01
  [161,]  2.319455e+00  8.086177e-01  7.777198e-01  3.865898e-01  5.497539e-01 -1.071935e-01
  [162,]  1.545696e+00  1.581049e+00  7.992864e-01 -1.243311e-01  5.378649e-01  1.187910e-01
  [163,]  3.098880e-01  2.733051e+00  1.364830e+00 -3.830859e-01  4.013289e-01 -2.839183e-02
  [164,]  1.053480e+00  2.035085e+00  1.117748e+00 -2.013981e-01  4.824652e-01 -2.245388e-02
  [165,]  3.807886e+00 -5.048465e-01  5.647328e-01  7.574360e-01  5.701567e-01 -3.459630e-01
  [166,]  2.855763e+00  5.998251e-01  4.149993e-01  3.069515e-01  6.704102e-01  1.883833e-01
 [ reached getOption("max.print") -- omitted 35229 rows ]
get_eig(res)
<<<<<<< HEAD ======= >>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366

Screeplot

get_eig(res) %>%
  ggplot(aes(x = 1:6, y = cumulative.variance.percent)) +
  geom_line() +
  geom_point() +
  geom_hline(yintercept = 80) +
  xlab("Principal Component") +
  ylab("Proportion of Variance Explained") +
  ggtitle("Scree Plot of Principal Component for Batting Statistics")
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366

2 Principal Components: PC1 and PC2

fviz_screeplot(res, main = "Scree Plot")
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366

Can Identify an elbow in 3.

Biplot

res %>%
  fviz_pca_var(axes = c(1,2),
               col.var = "contrib",
               gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
               repel = TRUE
               )
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366

Cluster Analysis (Lesson 5)

#NOT COMPLETE!!!!! This was just a test, bstats is way too big.
bstats_best <- bstats %>%
  filter(PA >= 600)

eu_dist <- get_dist(careerBatting_num1, method = 'euclidean')
hc_complete <- hclust(eu_dist, method = 'complete')

plot(hc_complete)
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366

Silhouette

res_test <- careerBatting_num1 %>% kmeans(7)
  str(res_test)
List of 9
 $ cluster     : int [1:313] 2 7 6 4 4 4 5 3 2 2 ...
 $ centers     : num [1:7, 1:6] 0.292 0.284 0.272 0.285 0.283 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:7] "1" "2" "3" "4" ...
  .. ..$ : chr [1:6] "BA" "PA" "SlugPct" "OBP" ...
 $ totss       : num 651407
 $ withinss    : num [1:7] 21443 19571 15262 11195 17446 ...
 $ tot.withinss: num 115126
 $ betweenss   : num 536280
 $ size        : int [1:7] 45 29 57 39 80 24 39
 $ iter        : int 3
 $ ifault      : int 0
 - attr(*, "class")= chr "kmeans"
distance <- get_dist(careerBatting_num1, method = "euclidean")
sil <- silhouette(x = res_test$cluster, dist = distance)
summary(sil)
<<<<<<< HEAD
Silhouette of 313 units in 7 clusters from silhouette.default(x = res_test$cluster, dist = distance) :
 Cluster sizes and average silhouette widths:
       50       102        44        36        33        21        27 
0.3520849 0.4329740 0.3000351 0.2682495 0.3614668 0.4120772 0.3306556 
=======

Silhouette of 313 units in 7 clusters from silhouette.default(x = res_test$cluster, dist = distance) :
 Cluster sizes and average silhouette widths:
       45        29        57        39        80        24        39 
0.2661830 0.3229251 0.2975332 0.3243937 0.4329092 0.2847033 0.2636850 
>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
Individual silhouette widths:
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.05233  0.16571  0.35355  0.32812  0.48573  0.62032 
sil %>% head()
     cluster neighbor sil_width
[1,]       2        4 0.4003502
[2,]       7        3 0.1667519
[3,]       6        2 0.3253659
[4,]       4        3 0.3780880
[5,]       4        6 0.4693650
[6,]       4        3 0.1784711
fviz_silhouette(sil)
<<<<<<< HEAD ======= >>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
fviz_nbclust(careerBatting_num1, hcut, hc_method = "complete", hc_metric = "euclidean", method = "wss")
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
##This is to test other values of K for the silhouette method.
res_test1 <- careerBatting_num1 %>% kmeans(10 )
  str(res_test1)
List of 9
 $ cluster     : int [1:313] 10 4 6 3 3 3 8 2 6 10 ...
 $ centers     : num [1:10, 1:6] 0.287 0.278 0.285 0.273 0.269 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:10] "1" "2" "3" "4" ...
  .. ..$ : chr [1:6] "BA" "PA" "SlugPct" "OBP" ...
 $ totss       : num 651407
 $ withinss    : num [1:10] 5561 3421 7533 8158 17850 ...
 $ tot.withinss: num 82705
 $ betweenss   : num 568701
 $ size        : int [1:10] 47 26 32 35 31 29 28 40 25 20
 $ iter        : int 4
 $ ifault      : int 0
 - attr(*, "class")= chr "kmeans"
distance <- get_dist(careerBatting_num1, method="euclidean")
sil <- silhouette(x = res_test1$cluster, dist = distance)
summary(sil)
Silhouette of 313 units in 10 clusters from silhouette.default(x = res_test1$cluster, dist = distance) :
 Cluster sizes and average silhouette widths:
       47        26        32        35        31        29        28        40        25        20 
0.4123528 0.2235291 0.3095730 0.3662893 0.2348064 0.2444163 0.3084443 0.2360885 0.4169711 0.3504059 
Individual silhouette widths:
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.07461  0.18155  0.32364  0.31245  0.44921  0.62394 
sil %>% head()
     cluster neighbor  sil_width
[1,]      10        6 0.25143884
[2,]       4        8 0.56107068
[3,]       6       10 0.21097598
[4,]       3        2 0.37769870
[5,]       3        6 0.09750601
[6,]       3        4 0.41195414
fviz_silhouette(sil)
<<<<<<< HEAD ======= >>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366

Diana

Linear Regression (Lesson 6)

Linear Regression comparing team payroll and win rate.

teams = as.data.table(Teams)
teams = teams[, .(yearID,
                  lgID = as.character(lgID),
                  teamID = as.character(teamID),
                  franchID = as.character(franchID),
                  Rank, G, W, L, R, ERA, SO,
                  WinPercent = W/(W+L))]

salaries = as.data.table(Salaries)
salaries = salaries[, c("lgID", "teamID", "salary1M") :=
                      list(as.character(lgID), as.character(teamID), salary / 1e6L)]
payroll = salaries[, .(payroll = sum(salary1M)), by=.(teamID, yearID)]
teamPayroll = merge(teams, payroll, by = c("teamID", "yearID"))
ggplot(data = teamPayroll, aes(x = payroll, y = WinPercent)) + geom_point()  + labs(x = "Payroll (in millions)", y = "Win Percentage") +
  geom_smooth(method = lm, se = FALSE)
<<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
mod_lm <- lm(data = teamPayroll, WinPercent~payroll)
mod_lm

Call:
lm(formula = WinPercent ~ payroll, data = teamPayroll)

Coefficients:
(Intercept)      payroll  
  0.4796007    0.0003396  
summary(mod_lm)

Call:
lm(formula = WinPercent ~ payroll, data = teamPayroll)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.230866 -0.048237 -0.000954  0.049584  0.211074 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.4796007  0.0037895 126.561  < 2e-16 ***
payroll     0.0003396  0.0000512   6.633 5.61e-11 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.06714 on 916 degrees of freedom
Multiple R-squared:  0.04583,   Adjusted R-squared:  0.04479 
F-statistic:    44 on 1 and 916 DF,  p-value: 5.611e-11
payroll_pred <- teamPayroll %>%
  add_predictions(mod_lm)

payroll_pred %>%
  filter(yearID >= 2010) %>%
  arrange(desc(pred)) %>%
  head(25)
<<<<<<< HEAD ======= >>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
payroll_pred %>%
  filter(yearID >= 2010) %>%
  arrange(desc(WinPercent)) %>%
  head(25)
<<<<<<< HEAD ======= >>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366

Only five teams are in the top 25 of both payroll and win percentage in the 2010s. These teams are the 2011 Phillies, 2011 Yankees, 2010 Yankees, 2012 Yankees, and 2016 Rangers. This shows that spending the most money doesn’t automatically mean you are getting the best product on the field. ## Simple Linear Regression

Multiple Linear Regression

bstats_salary <- bstats_salary %>%
  filter(PA >= 100) %>%
  filter(salary > 500000)
lm_mod <- lm(salary ~ H, HR, data = bstats_salary)
summary(lm_mod)
<<<<<<< HEAD ======= >>>>>>> b3137a96c1d92749797373aaec5b835d16dc9c5d

Call:
lm(formula = salary ~ H, data = bstats_salary, subset = HR)

Residuals:
    Min      1Q  Median      3Q     Max 
<<<<<<< HEAD
-356091 -247912  -83020  131515 1616624 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 283807.21    4454.23   63.72   <2e-16 ***
H              821.41      49.58   16.57   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 310200 on 14725 degrees of freedom
Multiple R-squared:  0.0183,    Adjusted R-squared:  0.01823 
F-statistic: 274.5 on 1 and 14725 DF,  p-value: < 2.2e-16
======= -358510 -224975 -68095 79439 1124078 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 814969.19 8653.86 94.174 <2e-16 *** H 573.87 64.96 8.834 <2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 314600 on 6957 degrees of freedom Multiple R-squared: 0.01109, Adjusted R-squared: 0.01095 F-statistic: 78.04 on 1 and 6957 DF, p-value: < 2.2e-16
>>>>>>> b3137a96c1d92749797373aaec5b835d16dc9c5d
lm_mod_prd <- bstats_salary %>% add_predictions(lm_mod)
lm_mod_prd
<<<<<<< HEAD
======= <<<<<<< HEAD ======= >>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
full_model <- lm(salary ~., data = bstats_sure)
summary(full_model)

Call:
lm(formula = salary ~ ., data = bstats_sure)

Residuals:
     Min       1Q   Median       3Q      Max 
-6914779 -1878645 -1019496   403743 29613794 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -1794883     277282  -6.473 9.93e-11 ***
OPS           9325457     399105  23.366  < 2e-16 ***
BABIP       -10628363    1053976 -10.084  < 2e-16 ***
K_Percent    -3344230     512360  -6.527 6.95e-11 ***
BB_Percent    7390060     977602   7.559 4.31e-14 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 3549000 on 13381 degrees of freedom
Multiple R-squared:  0.08167,   Adjusted R-squared:  0.0814 
F-statistic: 297.5 on 4 and 13381 DF,  p-value: < 2.2e-16
full_model_pred <- bstats_sure %>% add_predictions(full_model)
full_model_pred
<<<<<<< HEAD ======= >>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366
adv_stat_mod <- lm(salary ~ OPS, data = bstats_salary)
summary(adv_stat_mod)

Call:
lm(formula = salary ~ OPS, data = bstats_salary)

Residuals:
     Min       1Q   Median       3Q      Max 
-7024728 -2574846 -1268208  1064627 28064361 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -4206268     319409  -13.17   <2e-16 ***
OPS         10793278     419340   25.74   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 4184000 on 7207 degrees of freedom
Multiple R-squared:  0.08418,   Adjusted R-squared:  0.08406 
F-statistic: 662.5 on 1 and 7207 DF,  p-value: < 2.2e-16
>>>>>>> b3137a96c1d92749797373aaec5b835d16dc9c5d

Resampling Methods

Feature Selection

Salary Data

franchise <- c(`ANA` = "LAA", `ARI` = "ARI", `ATL` = "ATL", 
               `BAL` = "BAL", `BOS` = "BOS", `CAL` = "LAA",
               `CHA` = "CHA", `CHN` = "CHN", `CIN` = "CIN", 
               `CLE` = "CLE", `COL` = "COL", `DET` = "DET", 
               `FLO` = "MIA", `HOU` = "HOU", `KCA` = "KCA", 
               `LAA` = "LAA", `LAN` = "LAN", `MIA` = "MIA", 
               `MIL` = "MIL", `MIN` = "MIN", `ML4` = "MIL", 
               `MON` = "WAS", `NYA` = "NYA", `NYM` = "NYN", 
               `NYN` = "NYN", `OAK` = "OAK", `PHI` = "PHI", 
               `PIT` = "PIT", `SDN` = "SDN", `SEA` = "SEA",
               `SFG` = "SFN", `SFN` = "SFN", `SLN` = "SLN", 
               `TBA` = "TBA", `TEX` = "TEX", `TOR` = "TOR",
               `WAS` = "WAS")
Salaries$franchise <- unname(franchise[Salaries$teamID])
avg_team_salaries <- Salaries %>%
    group_by(yearID, franchise, lgID) %>%
    summarise(salary = mean(salary)/1e6) %>%
    filter(!(franchise == "CLE" & lgID == "NL"))
`summarise()` has grouped output by 'yearID', 'franchise'. You can override using the `.groups` argument.
ggplot(avg_team_salaries, 
       aes(x = yearID, y = salary, group = factor(franchise))) +
       geom_path() +
       labs(x = "Year", y = "Average team salary (millions USD)")
<<<<<<< HEAD

======= <<<<<<< HEAD

=======

>>>>>>> d16bcc7760e22b3826b7dca6af534c19cebd9366 >>>>>>> b3137a96c1d92749797373aaec5b835d16dc9c5d
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCmVkaXRvcl9vcHRpb25zOiAKICBjaHVua19vdXRwdXRfdHlwZTogaW5saW5lCi0tLQoKYGBge3J9CnJtKGxpc3QgPSBscygpKQoKbGlicmFyeShMYWhtYW4pCmxpYnJhcnkobW9zYWljKQpsaWJyYXJ5KHRpZHlyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShkcGx5cikKbGlicmFyeShtcGxvdCkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGNsdXN0ZXIpCmxpYnJhcnkoZmFjdG9leHRyYSkKbGlicmFyeShjb3JycGxvdCkKbGlicmFyeShkYXRhLnRhYmxlKQpsaWJyYXJ5KG1vZCkKbGlicmFyeShtb2RlbHIpCmBgYAoKYGBge3J9CiNMb2FkIGluIFBlb3BsZSwgQmF0dGluZywgYW5kIFBpdGNoaW5nIERhdGFmcmFtZXMKZGF0YSgiUGVvcGxlIikgCmRhdGEoIkJhdHRpbmciKQpkYXRhKCJQaXRjaGluZyIpCmBgYAoKYGBge3J9CiNNZXJnZXMgcGxheWVyIG5hbWUgdG8gQmF0dGluZyBkYXRhLiAKYnN0YXRzIDwtIGJhdHRpbmdTdGF0cygpCglzdHIoYnN0YXRzKQoJCgpQZW9wbGUkbmFtZSA8LSBwYXN0ZShQZW9wbGUkbmFtZUZpcnN0LCBQZW9wbGUkbmFtZUxhc3QsIHNlcCA9ICIgIikKCmJhdHRpbmdfbmFtZSA8LSBtZXJnZShCYXR0aW5nLAogICAgICAgICAgICAgICAgIFBlb3BsZVssYygicGxheWVySUQiLCAibmFtZSIpXSwKICAgICAgICAgICAgICAgICBieSA9ICJwbGF5ZXJJRCIsIGFsbC54ID0gVFJVRSkKCiNNZXJnZXMgcGxheWVyIG5hbWUgdG8gUGl0Y2hpbmcgZGF0YS4KClBlb3BsZSRuYW1lIDwtIHBhc3RlKFBlb3BsZSRuYW1lRmlyc3QsIFBlb3BsZSRuYW1lTGFzdCwgc2VwID0gIiAiKQoKcGl0Y2hpbmdfbmFtZSA8LSBtZXJnZShQaXRjaGluZywKICAgICAgICAgICAgICAgICBQZW9wbGVbLGMoInBsYXllcklEIiwgIm5hbWUiKV0sCiAgICAgICAgICAgICAgICAgYnkgPSAicGxheWVySUQiLCBhbGwueCA9IFRSVUUpCmBgYAoKYGBge3J9CiNDcmVhdGluZyBhZGRpdGlvbmFsIHN0YXRzIGZvciBic3RhdHMKYnN0YXRzW2lzLm5hKGJzdGF0cyldID0gMAojaXMubmFuKGJzdGF0cykKCmJzdGF0cyA8LSBic3RhdHMgJT4lCiAgbXV0YXRlKEtfUGVyY2VudCA9IFNPIC8gUEEpICU+JQogIG11dGF0ZShCQl9QZXJjZW50ID0gKEJCICsgSUJCKSAvIFBBKSAlPiUKICBtdXRhdGVfYWxsKH5yZXBsYWNlKC4sIGlzLm5hbiguKSwgMCkpCgpgYGAKCmBgYHtyfQpic3RhdHMgPC0gYnN0YXRzICU+JQogIG11dGF0ZV9hdCh2YXJzKEtfUGVyY2VudCwgQkJfUGVyY2VudCksIGZ1bnMocm91bmQoLiwgMykpKQpgYGAKCmBgYHtyfQpic3RhdHNfc2FsYXJ5IDwtIGJzdGF0cyAlPiUKICAgICAgICAgICAgICBmaWx0ZXIoeWVhcklEID49IDE5ODUpICU+JQogICAgICAgICAgICAgIGxlZnRfam9pbihzZWxlY3QoU2FsYXJpZXMsIHBsYXllcklELCB5ZWFySUQsIHRlYW1JRCwgc2FsYXJ5KSwgCiAgICAgICAgICAgICAgICAgICAgICAgICBieT1jKCJwbGF5ZXJJRCIsICJ5ZWFySUQiLCAidGVhbUlEIikpCgpic3RhdHNfc2FsYXJ5W2lzLm5hKGJzdGF0c19zYWxhcnkpXSA9IDAKc3RyKGJzdGF0c19zYWxhcnkpCgpgYGAKCmBgYHtyfQpic3RhdHNfc3VyZSA8LSBic3RhdHNfc2FsYXJ5ICU+JQogIGZpbHRlcihQQSA+IDE1MCkgJT4lCiAgc2VsZWN0KE9QUywgQkFCSVAsIEtfUGVyY2VudCwgQkJfUGVyY2VudCwgc2FsYXJ5KQpgYGAKCiMjIERhdGEgUHJlcGFyYXRpb24gKExlc3NvbiAxICYgMikKCmBgYHtyfQojS2VlcCBwbGF5ZXJzIHdpdGggb3ZlciAxNTAgYXQgYmF0cy4gKFdlIGNhbiBjaGFuZ2UgdGhpcyB2YWx1ZSBpZiBuZWNlc3NhcnkpLgojQ3JlYXRpbmcgYmF0dGluZyBhdmVyYWdlIHZhcmlhYmxlLgoKYmF0dGluZzEgPC0gYnN0YXRzICU+JQogIGZpbHRlcihBQiA+PSAxNTApCiAgCmBgYAoKYGBge3J9CmJzdGF0cyAlPiUKICBmaWx0ZXIocGxheWVySUQgPT0gImJvZ2FleGEwMSIpCmBgYAoKIyMgRXhwbG9yYXRvcnkgQW5hbHlzaXMgKExlc3NvbiAxICYgMikKTGVzc29ucyAxIGFuZCAyIHdpbGwganVzdCBiZSBwYXJ0cyBvZiB0aGUgb3ZlcmFsbCBwcm9qZWN0LiBTaW1wbGUgdGhpbmdzIGxpa2UgZGF0YSBtYW5pcHVsYXRpb24sIGFwcGx5IGZ1bmN0aW9ucywgYm94cGxvdHMsIGV0Yy4gVGhpcyB3aWxsIGJlIGRhdGEgcHJlcGFyYXRpb24gaXRlbXMgYW5kIGV4cGxvcmF0b3J5IGFuYWx5c2lzLgoKYGBge3J9CmIgPC0gZ2dwbG90KGJhdHRpbmcxLCBhZXMoeCA9IHRlYW1JRCwgeSA9IEhSKSkgKwogIGdlb21fYm94cGxvdChjb2wgPSAiYmxhY2siLCBhZXMoZmlsbCA9IHRlYW1JRCkpCmIKCmBgYAoKYGBge3J9CmhpdHRlcnMxIDwtIGJhdHRpbmcxICU+JQogIGZpbHRlcih5ZWFySUQgPCAxODk1KSAlPiUKICBzZWxlY3QoU2x1Z1BjdCkKCmhpdHRlcnMyIDwtIGJhdHRpbmcxICU+JQogIGZpbHRlcih5ZWFySUQgPiAxODk0LCB5ZWFySUQgPCAxOTIxKSAlPiUKICBzZWxlY3QoU2x1Z1BjdCkKCmhpdHRlcnMzIDwtIGJhdHRpbmcxICU+JQogIGZpbHRlcih5ZWFySUQgPiAxOTIwLCB5ZWFySUQgPCAxOTY5KSAlPiUKICBzZWxlY3QoU2x1Z1BjdCkKCmhpdHRlcnM0IDwtIGJhdHRpbmcxICU+JQogIGZpbHRlcih5ZWFySUQgPiAxOTY5KSAlPiUKICBzZWxlY3QoU2x1Z1BjdCkKI09yZ2FuaXppbmcgNCBkaWZmZXJlbnQgZGF0YXNldHMgbG9va2luZyBhdCBzbHVnZ2luZyBwZXJjZW50YWdlIGZvciB0aGUgZm9sbG93aW5nIGJveHBsb3RzLiBBbGwgb2YgdGhlc2UgYXJlIHNvbWV3aGF0IGRpZmZlcmVudCBlcmFzLCB3aXRoIHRoZSBtb3N0IGRyYW1hdGljIHNwbGl0IGJlaW5nIGZyb20gYmVmb3JlIDE5MjAgKHByZS1CYWJlIFJ1dGgpIGFuZCBhZnRlciAxOTIwIChkdXJpbmcgYW5kIHBvc3QtQmFiZSBSdXRoKQpgYGAKCmBgYHtyfQpib3hwbG90KGhpdHRlcnMxLAogICAgICAgIG1haW4gPSAiU2x1Z2dpbmcgcGVyY2VudGFnZSBmcm9tIGxhdGUgMTg3MSAtIDE4OTQiLAogICAgICAgIHlsYWIgPSAiU2x1Z2dpbmcgcGVyY2VudGFnZSIsCiAgICAgICAgY29sID0gImJsdWUiLAogICAgICAgIGhvcml6b250YWwgPSBUUlVFKQpgYGAKCmBgYHtyfQpib3hwbG90KGhpdHRlcnMyLCAKICAgICAgICBtYWluID0gIlNsdWdnaW5nIHBlcmNlbnRhZ2UgZnJvbSAxODk1LTE5MjAiLAogICAgICAgIHlsYWIgPSAiU2x1Z2dpbmcgcGVyY2VudGFnZSIsCiAgICAgICAgY29sID0gInllbGxvdyIsCiAgICAgICAgaG9yaXpvbnRhbCA9IFRSVUUpCmBgYAoKYGBge3J9CmJveHBsb3QoaGl0dGVyczMsIAogICAgICAgIG1haW4gPSAiU2x1Z2dpbmcgcGVyY2VudGFnZSBmcm9tIDE5MjEtMTk2OCIsCiAgICAgICAgeWxhYiA9ICJTbHVnZ2luZyBwZXJjZW50YWdlIiwKICAgICAgICBjb2wgPSAicmVkIiwKICAgICAgICBob3Jpem9udGFsID0gVFJVRSkKYGBgCgpgYGB7cn0KYm94cGxvdChoaXR0ZXJzNCwgCiAgICAgICAgbWFpbiA9ICJTbHVnZ2luZyBwZXJjZW50YWdlIGZyb20gMTk2OSAtIHByZXNlbnQiLAogICAgICAgIHlsYWIgPSAiU2x1Z2dpbmcgcGVyY2VudGFnZSIsCiAgICAgICAgY29sID0gInJlZCIsCiAgICAgICAgaG9yaXpvbnRhbCA9IFRSVUUpCmBgYAoKCmBgYHtyfQpzYXBwbHkoaGl0dGVyczEsIG1lYW4sIG5hLnJtID0gVCkKc2FwcGx5KGhpdHRlcnMyLCBtZWFuLCBuYS5ybSA9IFQpCnNhcHBseShoaXR0ZXJzMywgbWVhbiwgbmEucm0gPSBUKQpzYXBwbHkoaGl0dGVyczQsIG1lYW4sIG5hLnJtID0gVCkKI05vdGljZSB0aGF0IGdpZ2FudGljIGluY3JlYXNlIGJldHdlZW4gaGl0dGVyczIgYW5kIGhpdHRlcnMzCmBgYAoKYGBge3J9CnN1bW1hcnkoaGl0dGVyczEpCmBgYAoKYGBge3J9CnN1bW1hcnkoaGl0dGVyczIpCmBgYAoKYGBge3J9CnN1bW1hcnkoaGl0dGVyczMpCmBgYAoKYGBge3J9CnN1bW1hcnkoaGl0dGVyczQpCmBgYAoKYGBge3J9CiNLZWVwIGJhdHRpbmcgc3RhdHMgdGhhdCB3ZSB3YW50IGZvciBwYWlycy4KYmF0dGluZ19udW0gPC0gYnN0YXRzICU+JQogIGZpbHRlcihQQSA+PSAxNTApICU+JQogIHNlbGVjdCgiQkEiLCAnT0JQJywgJ1NsdWdQY3QnLCAiU08iLCAiQkIiLCAiSFIiKQogIApgYGAKCmBgYHtyfQpwYWlycyhiYXR0aW5nX251bSkKYGBgCiMjIyMgQ2FyZWVyIEJhdHRpbmcgU3RhdHMKYGBge3J9CmNhcmVlckJhdHRpbmcgPC0gbmEub21pdChic3RhdHMpCmBgYAoKYGBge3J9CmNhcmVlckJhdHRpbmcgPC0gY2FyZWVyQmF0dGluZyAlPiUKICBzZWxlY3QocGxheWVySUQsIEJBLCBQQSwgU2x1Z1BjdCwgT0JQLCBTTywgSFIpICU+JQogIGdyb3VwX2J5KHBsYXllcklEKSAlPiUKICBzdW1tYXJpc2VfYWxsKCdtZWFuJykKYGBgCgpgYGB7cn0KY2FyZWVyQmF0dGluZ19udW0gPC0gY2FyZWVyQmF0dGluZyAlPiUKICBmaWx0ZXIoUEEgPj0gMTUwKSAlPiUKICBzZWxlY3QoQkEsIFBBLCBTbHVnUGN0LCBPQlAsIFNPLCBIUikKCnBhaXJzKGNhcmVlckJhdHRpbmdfbnVtKQpgYGAKYGBge3J9CmNvcnJtYXRyaXggPC0gY29yKGJhdHRpbmdfbnVtKQpjb3JycGxvdChjb3JybWF0cml4LCBtZXRob2QgPSAnbnVtYmVyJykgI0dpdmVzIHVzIGNvcnJlbGF0aW9uIGZyb20gcGFpcnMgZ3JhcGguCmBgYAoKYGBge3J9CmNhcmVlckJhdHRpbmdfbnVtMSA8LSBjYXJlZXJCYXR0aW5nX251bSAlPiUKICBmaWx0ZXIoUEEgPiA1MDApCmBgYAoKCiMjIDAtZGltZW5zaW9uYWwgUmVkdWN0aW9uIChMZXNzb24gNCkKCgojIyMjIEJvb3RzdHJhcHBpbmcKCiMjIFBDQSAoTGVzc29uIDQpCmBgYHtyfQpyZXMgPC0gYmF0dGluZ19udW0gJT4lIHByY29tcChzY2FsZSA9IFRSVUUpCnJlcwpgYGAKCmBgYHtyfQpsb2FkaW5ncyA8LSByZXMkcm90YXRpb24KbG9hZGluZ3MKYGBgCgpgYGB7cn0Kc2NvcmVfbWF0IDwtIHJlcyR4CnNjb3JlX21hdApgYGAKCgpgYGB7cn0KZ2V0X2VpZyhyZXMpCmBgYAoKIyMjIyBTY3JlZXBsb3QKYGBge3J9CmdldF9laWcocmVzKSAlPiUKICBnZ3Bsb3QoYWVzKHggPSAxOjYsIHkgPSBjdW11bGF0aXZlLnZhcmlhbmNlLnBlcmNlbnQpKSArCiAgZ2VvbV9saW5lKCkgKwogIGdlb21fcG9pbnQoKSArCiAgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gODApICsKICB4bGFiKCJQcmluY2lwYWwgQ29tcG9uZW50IikgKwogIHlsYWIoIlByb3BvcnRpb24gb2YgVmFyaWFuY2UgRXhwbGFpbmVkIikgKwogIGdndGl0bGUoIlNjcmVlIFBsb3Qgb2YgUHJpbmNpcGFsIENvbXBvbmVudCBmb3IgQmF0dGluZyBTdGF0aXN0aWNzIikKYGBgCgoyIFByaW5jaXBhbCBDb21wb25lbnRzOiBQQzEgYW5kIFBDMgoKYGBge3J9CmZ2aXpfc2NyZWVwbG90KHJlcywgbWFpbiA9ICJTY3JlZSBQbG90IikKYGBgCgpDYW4gSWRlbnRpZnkgYW4gZWxib3cgaW4gMy4KCiMjIyMgQmlwbG90CmBgYHtyfQpyZXMgJT4lCiAgZnZpel9wY2FfdmFyKGF4ZXMgPSBjKDEsMiksCiAgICAgICAgICAgICAgIGNvbC52YXIgPSAiY29udHJpYiIsCiAgICAgICAgICAgICAgIGdyYWRpZW50LmNvbHMgPSBjKCIjMDBBRkJCIiwgIiNFN0I4MDAiLCAiI0ZDNEUwNyIpLAogICAgICAgICAgICAgICByZXBlbCA9IFRSVUUKICAgICAgICAgICAgICAgKQpgYGAKCgojIyBDbHVzdGVyIEFuYWx5c2lzIChMZXNzb24gNSkKYGBge3J9CiNOT1QgQ09NUExFVEUhISEhISBUaGlzIHdhcyBqdXN0IGEgdGVzdCwgYnN0YXRzIGlzIHdheSB0b28gYmlnLgpic3RhdHNfYmVzdCA8LSBic3RhdHMgJT4lCiAgZmlsdGVyKFBBID49IDYwMCkKCmV1X2Rpc3QgPC0gZ2V0X2Rpc3QoY2FyZWVyQmF0dGluZ19udW0xLCBtZXRob2QgPSAnZXVjbGlkZWFuJykKYGBgCgpgYGB7cn0KaGNfY29tcGxldGUgPC0gaGNsdXN0KGV1X2Rpc3QsIG1ldGhvZCA9ICdjb21wbGV0ZScpCgpwbG90KGhjX2NvbXBsZXRlKQpgYGAKCiMjIyMgU2lsaG91ZXR0ZQoKYGBge3J9CnJlc190ZXN0IDwtIGNhcmVlckJhdHRpbmdfbnVtMSAlPiUga21lYW5zKDcpCiAgc3RyKHJlc190ZXN0KQpgYGAKCgpgYGB7cn0KZGlzdGFuY2UgPC0gZ2V0X2Rpc3QoY2FyZWVyQmF0dGluZ19udW0xLCBtZXRob2QgPSAiZXVjbGlkZWFuIikKc2lsIDwtIHNpbGhvdWV0dGUoeCA9IHJlc190ZXN0JGNsdXN0ZXIsIGRpc3QgPSBkaXN0YW5jZSkKc3VtbWFyeShzaWwpCnNpbCAlPiUgaGVhZCgpCmBgYAoKYGBge3J9CmZ2aXpfc2lsaG91ZXR0ZShzaWwpCmBgYAoKYGBge3J9CmZ2aXpfbmJjbHVzdChjYXJlZXJCYXR0aW5nX251bTEsIGhjdXQsIGhjX21ldGhvZCA9ICJjb21wbGV0ZSIsIGhjX21ldHJpYyA9ICJldWNsaWRlYW4iLCBtZXRob2QgPSAid3NzIikKYGBgCgpgYGB7cn0KIyNUaGlzIGlzIHRvIHRlc3Qgb3RoZXIgdmFsdWVzIG9mIEsgZm9yIHRoZSBzaWxob3VldHRlIG1ldGhvZC4KcmVzX3Rlc3QxIDwtIGNhcmVlckJhdHRpbmdfbnVtMSAlPiUga21lYW5zKDEwICkKICBzdHIocmVzX3Rlc3QxKQpgYGAKCgpgYGB7cn0KZGlzdGFuY2UgPC0gZ2V0X2Rpc3QoY2FyZWVyQmF0dGluZ19udW0xLCBtZXRob2Q9ImV1Y2xpZGVhbiIpCnNpbCA8LSBzaWxob3VldHRlKHggPSByZXNfdGVzdDEkY2x1c3RlciwgZGlzdCA9IGRpc3RhbmNlKQpzdW1tYXJ5KHNpbCkKc2lsICU+JSBoZWFkKCkKYGBgCgpgYGB7cn0KZnZpel9zaWxob3VldHRlKHNpbCkKYGBgCgoKIyMjIyBEaWFuYQoKIyMgTGluZWFyIFJlZ3Jlc3Npb24gKExlc3NvbiA2KQoKTGluZWFyIFJlZ3Jlc3Npb24gY29tcGFyaW5nIHRlYW0gcGF5cm9sbCBhbmQgd2luIHJhdGUuCmBgYHtyfQp0ZWFtcyA9IGFzLmRhdGEudGFibGUoVGVhbXMpCnRlYW1zID0gdGVhbXNbLCAuKHllYXJJRCwKICAgICAgICAgICAgICAgICAgbGdJRCA9IGFzLmNoYXJhY3RlcihsZ0lEKSwKICAgICAgICAgICAgICAgICAgdGVhbUlEID0gYXMuY2hhcmFjdGVyKHRlYW1JRCksCiAgICAgICAgICAgICAgICAgIGZyYW5jaElEID0gYXMuY2hhcmFjdGVyKGZyYW5jaElEKSwKICAgICAgICAgICAgICAgICAgUmFuaywgRywgVywgTCwgUiwgRVJBLCBTTywKICAgICAgICAgICAgICAgICAgV2luUGVyY2VudCA9IFcvKFcrTCkpXQoKc2FsYXJpZXMgPSBhcy5kYXRhLnRhYmxlKFNhbGFyaWVzKQpzYWxhcmllcyA9IHNhbGFyaWVzWywgYygibGdJRCIsICJ0ZWFtSUQiLCAic2FsYXJ5MU0iKSA6PQogICAgICAgICAgICAgICAgICAgICAgbGlzdChhcy5jaGFyYWN0ZXIobGdJRCksIGFzLmNoYXJhY3Rlcih0ZWFtSUQpLCBzYWxhcnkgLyAxZTZMKV0KcGF5cm9sbCA9IHNhbGFyaWVzWywgLihwYXlyb2xsID0gc3VtKHNhbGFyeTFNKSksIGJ5PS4odGVhbUlELCB5ZWFySUQpXQp0ZWFtUGF5cm9sbCA9IG1lcmdlKHRlYW1zLCBwYXlyb2xsLCBieSA9IGMoInRlYW1JRCIsICJ5ZWFySUQiKSkKYGBgCgpgYGB7cn0KZ2dwbG90KGRhdGEgPSB0ZWFtUGF5cm9sbCwgYWVzKHggPSBwYXlyb2xsLCB5ID0gV2luUGVyY2VudCkpICsgZ2VvbV9wb2ludCgpICArIGxhYnMoeCA9ICJQYXlyb2xsIChpbiBtaWxsaW9ucykiLCB5ID0gIldpbiBQZXJjZW50YWdlIikgKwogIGdlb21fc21vb3RoKG1ldGhvZCA9IGxtLCBzZSA9IEZBTFNFKQoKYGBgCmBgYHtyfQptb2RfbG0gPC0gbG0oZGF0YSA9IHRlYW1QYXlyb2xsLCBXaW5QZXJjZW50fnBheXJvbGwpCm1vZF9sbQpgYGAKCmBgYHtyfQpzdW1tYXJ5KG1vZF9sbSkKYGBgCmBgYHtyfQpwYXlyb2xsX3ByZWQgPC0gdGVhbVBheXJvbGwgJT4lCiAgYWRkX3ByZWRpY3Rpb25zKG1vZF9sbSkKCnBheXJvbGxfcHJlZCAlPiUKICBmaWx0ZXIoeWVhcklEID49IDIwMTApICU+JQogIGFycmFuZ2UoZGVzYyhwcmVkKSkgJT4lCiAgaGVhZCgyNSkKYGBgCmBgYHtyfQpwYXlyb2xsX3ByZWQgJT4lCiAgZmlsdGVyKHllYXJJRCA+PSAyMDEwKSAlPiUKICBhcnJhbmdlKGRlc2MoV2luUGVyY2VudCkpICU+JQogIGhlYWQoMjUpCmBgYApPbmx5IGZpdmUgdGVhbXMgYXJlIGluIHRoZSB0b3AgMjUgb2YgYm90aCBwYXlyb2xsIGFuZCB3aW4gcGVyY2VudGFnZSBpbiB0aGUgMjAxMHMuIFRoZXNlIHRlYW1zIGFyZSB0aGUgMjAxMSBQaGlsbGllcywgMjAxMSBZYW5rZWVzLCAyMDEwIFlhbmtlZXMsIDIwMTIgWWFua2VlcywgYW5kIDIwMTYgUmFuZ2Vycy4gVGhpcyBzaG93cyB0aGF0IHNwZW5kaW5nIHRoZSBtb3N0IG1vbmV5IGRvZXNuJ3QgYXV0b21hdGljYWxseSBtZWFuIHlvdSBhcmUgZ2V0dGluZyB0aGUgYmVzdCBwcm9kdWN0IG9uIHRoZSBmaWVsZC4KIyMgU2ltcGxlIExpbmVhciBSZWdyZXNzaW9uCgojIyBNdWx0aXBsZSBMaW5lYXIgUmVncmVzc2lvbgpgYGB7cn0KYnN0YXRzX3NhbGFyeSA8LSBic3RhdHNfc2FsYXJ5ICU+JQogIGZpbHRlcihQQSA+PSAxMDApICU+JQogIGZpbHRlcihzYWxhcnkgPiA1MDAwMDApCmBgYAoKCmBgYHtyfQpsbV9tb2QgPC0gbG0oc2FsYXJ5IH4gSCwgSFIsIGRhdGEgPSBic3RhdHNfc2FsYXJ5KQpzdW1tYXJ5KGxtX21vZCkKYGBgCmBgYHtyfQpsbV9tb2RfcHJkIDwtIGJzdGF0c19zYWxhcnkgJT4lIGFkZF9wcmVkaWN0aW9ucyhsbV9tb2QpCmxtX21vZF9wcmQKYGBgCmBgYHtyfQpmdWxsX21vZGVsIDwtIGxtKHNhbGFyeSB+LiwgZGF0YSA9IGJzdGF0c19zdXJlKQpzdW1tYXJ5KGZ1bGxfbW9kZWwpCmBgYApgYGB7cn0KZnVsbF9tb2RlbF9wcmVkIDwtIGJzdGF0c19zdXJlICU+JSBhZGRfcHJlZGljdGlvbnMoZnVsbF9tb2RlbCkKZnVsbF9tb2RlbF9wcmVkCmBgYApgYGB7cn0KYWR2X3N0YXRfbW9kIDwtIGxtKHNhbGFyeSB+IE9QUywgZGF0YSA9IGJzdGF0c19zYWxhcnkpCnN1bW1hcnkoYWR2X3N0YXRfbW9kKQpgYGAKCgojIyBSZXNhbXBsaW5nIE1ldGhvZHMKCiMjIEZlYXR1cmUgU2VsZWN0aW9uCgojIyBTYWxhcnkgRGF0YQpgYGB7cn0KZnJhbmNoaXNlIDwtIGMoYEFOQWAgPSAiTEFBIiwgYEFSSWAgPSAiQVJJIiwgYEFUTGAgPSAiQVRMIiwgCiAgICAgICAgICAgICAgIGBCQUxgID0gIkJBTCIsIGBCT1NgID0gIkJPUyIsIGBDQUxgID0gIkxBQSIsCiAgICAgICAgICAgICAgIGBDSEFgID0gIkNIQSIsIGBDSE5gID0gIkNITiIsIGBDSU5gID0gIkNJTiIsIAogICAgICAgICAgICAgICBgQ0xFYCA9ICJDTEUiLCBgQ09MYCA9ICJDT0wiLCBgREVUYCA9ICJERVQiLCAKICAgICAgICAgICAgICAgYEZMT2AgPSAiTUlBIiwgYEhPVWAgPSAiSE9VIiwgYEtDQWAgPSAiS0NBIiwgCiAgICAgICAgICAgICAgIGBMQUFgID0gIkxBQSIsIGBMQU5gID0gIkxBTiIsIGBNSUFgID0gIk1JQSIsIAogICAgICAgICAgICAgICBgTUlMYCA9ICJNSUwiLCBgTUlOYCA9ICJNSU4iLCBgTUw0YCA9ICJNSUwiLCAKICAgICAgICAgICAgICAgYE1PTmAgPSAiV0FTIiwgYE5ZQWAgPSAiTllBIiwgYE5ZTWAgPSAiTllOIiwgCiAgICAgICAgICAgICAgIGBOWU5gID0gIk5ZTiIsIGBPQUtgID0gIk9BSyIsIGBQSElgID0gIlBISSIsIAogICAgICAgICAgICAgICBgUElUYCA9ICJQSVQiLCBgU0ROYCA9ICJTRE4iLCBgU0VBYCA9ICJTRUEiLAogICAgICAgICAgICAgICBgU0ZHYCA9ICJTRk4iLCBgU0ZOYCA9ICJTRk4iLCBgU0xOYCA9ICJTTE4iLCAKICAgICAgICAgICAgICAgYFRCQWAgPSAiVEJBIiwgYFRFWGAgPSAiVEVYIiwgYFRPUmAgPSAiVE9SIiwKICAgICAgICAgICAgICAgYFdBU2AgPSAiV0FTIikKYGBgCgpgYGB7cn0KU2FsYXJpZXMkZnJhbmNoaXNlIDwtIHVubmFtZShmcmFuY2hpc2VbU2FsYXJpZXMkdGVhbUlEXSkKYGBgCgoKYGBge3J9CmF2Z190ZWFtX3NhbGFyaWVzIDwtIFNhbGFyaWVzICU+JQogICAgZ3JvdXBfYnkoeWVhcklELCBmcmFuY2hpc2UsIGxnSUQpICU+JQogICAgc3VtbWFyaXNlKHNhbGFyeSA9IG1lYW4oc2FsYXJ5KS8xZTYpICU+JQogICAgZmlsdGVyKCEoZnJhbmNoaXNlID09ICJDTEUiICYgbGdJRCA9PSAiTkwiKSkKYGBgCmBgYHtyfQpnZ3Bsb3QoYXZnX3RlYW1fc2FsYXJpZXMsIAogICAgICAgYWVzKHggPSB5ZWFySUQsIHkgPSBzYWxhcnksIGdyb3VwID0gZmFjdG9yKGZyYW5jaGlzZSkpKSArCiAgICAgICBnZW9tX3BhdGgoKSArCiAgICAgICBsYWJzKHggPSAiWWVhciIsIHkgPSAiQXZlcmFnZSB0ZWFtIHNhbGFyeSAobWlsbGlvbnMgVVNEKSIpCmBgYAoKCg==